#####################################################################################################
### TITLE: texas_pm25.R
### PURPOSE: This code produces the maps in the appendix (A1 and C4)
####################################################################################################
rm(list = ls())
# Set CRAN mirror explicitly
options(repos = c(CRAN = "https://cloud.r-project.org"))

# Install required packages if they are not already installed
packages <- c("sf", "dplyr", "ggplot2", "showtext")
installed <- packages %in% rownames(installed.packages())
if (any(!installed)) {
  install.packages(packages[!installed])
}


if (interactive()) {
  if (requireNamespace("rstudioapi", quietly = TRUE) && rstudioapi::isAvailable()) {
    script_path <- rstudioapi::getActiveDocumentContext()$path
    base_dir <- dirname(script_path)
  } else {
    base_dir <- getwd()  # fallback
  }
} else {
  args <- commandArgs(trailingOnly = FALSE)
  script_path <- sub("--file=", "", args[grep("--file=", args)])
  base_dir <- dirname(normalizePath(script_path))
}

# Go 3 directories up
for (i in 1:2) {
  base_dir <- dirname(base_dir)
}

cat("Base directory is:", base_dir, "\n")

setwd(base_dir)
#Load necessary libraries
library(sf)
library(dplyr)
library(ggplot2)
library(showtext)


cat("✅ R script is running\n")

font_add("Arial", "C:/Windows/Fonts/Arial.ttf")  # adjust path as needed
showtext_auto()

# Construct paths relative to the base directory
urban_shapefile_path <- file.path(base_dir, "raw_data/Shapefiles/tl_2010_us_uac10/tl_2010_us_uac10.shp")
state_shapefile_path <- file.path(base_dir, "raw_data/State Boundaries/cb_2023_us_state_500k/cb_2023_us_state_500k.shp")
zipcode_shapefile_path <- file.path(base_dir, "raw_data/Shapefiles/Zipcodes/tl_2010_48_zcta510/tl_2010_48_zcta510.shp")

# Read the urban area shapefile
urban_shape <- st_read(urban_shapefile_path)
# Read the state shapefile
state_shape <- st_read(state_shapefile_path)
# Read the zipcode shapefile
zip_shape <- st_read(zipcode_shapefile_path)
# Read the complaints data 
facilities_complaints <- read.csv(file.path(base_dir, "processed_data/data_for_maps.csv"))

# Merge the CSV data with the zipcode shapefile based on ZipCode ID
merged_data <- merge(x = zip_shape, y = facilities_complaints, by.x = "ZCTA5CE10", by.y = "ZipCode", all.x = TRUE)

# Replace NA values for facilities and complaints with 0's
merged_data$count_RN <- ifelse(is.na(merged_data$count_RN), 0, merged_data$count_RN)
merged_data$complaints_pc <- ifelse(is.na(merged_data$complaints_pc), 0, merged_data$complaints_pc)

# Winsorize the complaints data
top_cutoff_point <- quantile(merged_data$complaints_pc, 0.99)
merged_data$complaints_winsor <- ifelse(merged_data$complaints_pc > top_cutoff_point, top_cutoff_point, merged_data$complaints_pc)

# Filter the state shapefile to include only Texas
texas_shape <- state_shape %>% filter(STATEFP == "48")  # '48' is the FIPS code for Texas

# Overlay the urban area shapefiles with the Texas state shapefile
texas_urban_shape <- st_intersection(urban_shape, texas_shape)

summary(merged_data$complaints_pc)

# Plot the complaints data
complaints_cont <- ggplot() +
  geom_sf(data = merged_data, aes(fill = complaints_winsor), color = NA) +  # Use winsorized data
  geom_sf(data = texas_urban_shape, fill = NA, color = "black", size = 0.1) +  # Thinner boundaries
  geom_sf(data = texas_shape, fill = NA, color = "black") +
  scale_fill_viridis_c(
    name = "Number of Complaints\n(per 1,000 people)",
    option = "magma",
    direction = -1,
    na.value = "grey90",  # Set a solid color for missing data
    limits = c(0, 9),
    guide = guide_colorbar(
      direction = "vertical",
      barheight = unit(100, units = "mm"),
      barwidth = unit(10, units = "mm"),
      draw.ulim = FALSE,
      title.position = 'top',
      title.vjust = 1,
      title.hjust = 0.5,
      label.hjust = 0.5
    )
  ) +
  theme_minimal() +
  theme(
    text = element_text(family = "Arial", color = "#22211d"), 
    plot.background = element_rect(fill = "#FFFFFF", color = NA), 
    panel.background = element_rect(fill = "#FFFFFF", color = NA), 
    legend.background = element_rect(fill = "transparent", color = NA),
    plot.title = element_text(size = 20, hjust = 0.5, color = "#4e4d47", margin = margin(b = 10)),
    plot.subtitle = element_text(size = 16, hjust = 0.5, color = "#4e4d47", margin = margin(b = 10)),
    plot.caption = element_text(size = 10, color = "#4e4d47", margin = margin(t = 10)),
    plot.margin = unit(c(1, 1, 1, 1), "cm"),
    legend.position = 'right',
    legend.direction = 'vertical',
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    panel.grid.major = element_blank(),  # Remove major grid lines
    panel.grid.minor = element_blank()   # Remove minor grid lines
  ) +
  coord_sf()

# Save the plot
ggsave(
  filename = "output/figures/Figure_C4_map.pdf",  # Specify the filename
  plot = complaints_cont,                 # Specify the plot object
  width = 10,                             # Width in inches
  height = 8,                             # Height in inches
  dpi = 300                               # Resolution in dots per inch
)

### Count of Only Title V Facilities

facilities_titleV <- read.csv("processed_data/data_for_maps_TitleV.csv")
merged_data2 <- merge(x = zip_shape, y = facilities_titleV, by.x = "ZCTA5CE10", by.y = "ZipCode", all.x=T)
merged_data2$count_RN <- ifelse(is.na(merged_data2$count_RN)==T, 0, merged_data2$count_RN)

# Winsorize the count_RN data
top_cutoff_point2 <- quantile(merged_data2$count_RN, 0.99)
merged_data2$count_RN_winsor <- ifelse(merged_data2$count_RN > top_cutoff_point2, top_cutoff_point2, merged_data2$count_RN)

summary(facilities_titleV$count_RN)

# Filter the state shapefile to include only Texas
texas_shape <- state_shape %>% filter(STATEFP == "48")  # '48' is the FIPS code for Texas

# Overlay the urban area shapefiles with the Texas state shapefile
texas_urban_shape <- st_intersection(urban_shape, texas_shape)

### Count of Title V and NSR Facilities

facilities <- read.csv("processed_data/data_for_maps.csv")
merged_data3 <- merge(x = zip_shape, y = facilities, by.x = "ZCTA5CE10", by.y = "ZipCode", all.x=T)
merged_data3$count_RN <- ifelse(is.na(merged_data3$count_RN)==T, 0, merged_data3$count_RN)

summary(merged_data3$complaints_pc)
# Winsorize the count_RN data
top_cutoff_point3 <- quantile(merged_data3$count_RN, 0.99)
merged_data3$count_RN_winsor <- ifelse(merged_data3$count_RN > top_cutoff_point3, top_cutoff_point3, merged_data3$count_RN)

summary(facilities$count_RN)

# Filter the state shapefile to include only Texas
texas_shape <- state_shape %>% filter(STATEFP == "48")  # '48' is the FIPS code for Texas

# Overlay the urban area shapefiles with the Texas state shapefile
texas_urban_shape <- st_intersection(urban_shape, texas_shape)

# Plot the facilities data
titleV_plot <- ggplot() +
  geom_sf(data = merged_data3, aes(fill = count_RN_winsor), color = NA) +  # Use winsorized data
  geom_sf(data = texas_urban_shape, fill = NA, color = "black", size = 0.1) +  # Thinner boundaries
  geom_sf(data = texas_shape, fill = NA, color = "black") +

  scale_fill_viridis_c(
    name = "Number of Facilities\n(per Zip Code)",
    option = "magma",
    direction = -1,
    na.value = "grey90",  # Set a solid color for missing data
    limits = c(0, max(merged_data3$count_RN_winsor, na.rm = TRUE)),
    guide = guide_colorbar(
      direction = "vertical",
      barheight = unit(100, units = "mm"),
      barwidth = unit(10, units = "mm"),
      draw.ulim = FALSE,
      title.position = 'top',
      title.vjust = 1,
      title.hjust = 0.5,
      label.hjust = 0.5
    )
  ) +
  theme_minimal() +
  theme(
    text = element_text(family = "Arial", color = "#22211d"), 
    plot.background = element_rect(fill = "#FFFFFF", color = NA), 
    panel.background = element_rect(fill = "#FFFFFF", color = NA), 
    legend.background = element_rect(fill = "transparent", color = NA),
    plot.title = element_text(size = 20, hjust = 0.5, color = "#4e4d47", margin = margin(b = 10)),
    plot.subtitle = element_text(size = 16, hjust = 0.5, color = "#4e4d47", margin = margin(b = 10)),
    plot.caption = element_text(size = 10, color = "#4e4d47", margin = margin(t = 10)),
    plot.margin = unit(c(1, 1, 1, 1), "cm"),
    legend.position = 'right',
    legend.direction = 'vertical',
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    panel.grid.major = element_blank(),  # Remove major grid lines
    panel.grid.minor = element_blank()   # Remove minor grid lines
  ) +
  coord_sf()

# Save the plot
ggsave(
  filename = "output/figures/Figure_A1_map.pdf",  # Specify the filename
  plot = titleV_plot,                       # Specify the plot object
  width = 10,                               # Width in inches
  height = 8,                               # Height in inches
  dpi = 300                                 # Resolution in dots per inch
)
